package com.zhen.lucene.analyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import java.io.IOException;
import java.io.StringReader;

public class StdAnalyzer {

    private static final String strCn = "中华人名共和国简称中国，是一个有13亿人口的国家";

    private static final String strEn = "Dogs can not achieve a place,eyes can reach;";

    public static void main(String[] args) throws IOException {
        System.out.println("StandardAnalyzer对中文分词:");
        stdAnalyzer(strCn);
        System.out.println("StandardAnalyzer对英文分词:");
        stdAnalyzer(strEn);
    }

    private static void stdAnalyzer(String str) throws IOException {
        Analyzer analyzer = new StandardAnalyzer();
        StringReader reader = new StringReader(str);
        TokenStream toStream = analyzer.tokenStream(str,reader);
        toStream.reset();
        CharTermAttribute teAttribute = toStream.getAttribute(CharTermAttribute.class);
        System.out.println("分词结果：");
        while (toStream.incrementToken()){
            System.out.print(teAttribute.toString() + "|");
        }
        System.out.println("\n");
        analyzer.close();
    }


}
